################################################################################-
# Replication File for Wratil, Wäckerle and Proksch: Government Rhetoric and the 
# Representation of Public Opinion in International Negotiations
#
# This script runs the analysis presented in Appendix G (Mentions of the Public).
#
# Additionally, it produces the following graphs and tables:
# Table C1
# Table C2
# Table C3
# Table C4
# Figure C1
# Figure C2
################################################################################-

library(quanteda)  #version 3.2.1
library(tidyverse) #version 1.3.2
library(lme4)      #version 1.1-30
library(sjPlot)    #version 2.8.11

dict.public <- dictionary(list(public= c("public*","people*","citizen*","public_opinion",
                                         "voter*","constituent*","inhabitant*",
                                         "resident*","taxpayer*")))

load(file="generated_data/corpus_for_final_analysis.RData")

corpus_orig_text <- corp_seg2 %>% 
  docvars() %>% 
  corpus(text_field = "text_original") 

kwic.save <- corpus_orig_text %>% 
  tokens() %>% 
  kwic(pattern = dict.public,window = 20)

write.csv(x = kwic.save ,file = "data/kwic_to_code.csv")
kwic_coding <- read.csv("data/KWIC APSR Coding.csv")

kwic_coding <- kwic_coding %>% 
  mutate(code_word = case_when(
    code == "1" ~ "Noun, refers to a general, vague public",
    code == "2" ~ "Noun, refers to national/European public",
    code == "3" ~ "Part of set term, such as 'public procurement', 'public prosecutor'",
    code == "4" ~ "Adjective",
    code == "5" ~ "Other"
  ))

kwic_codes <- kwic_coding  %>% 
  group_by(code_word) %>% 
  summarise("Number of Mentions" = n(),
            "Share of Mentions" = round(n()*100/nrow(kwic_coding),1)) %>% 
  arrange(-`Number of Mentions`) %>% 
  bind_rows(
    kwic_coding %>% 
      summarise("Number of Mentions" = n(),
                "Share of Mentions" = round(n()*100/nrow(kwic_coding),1)) %>% 
      add_column(`code_word` = "TOTAL",.before = 1)
  )

public_codes <- kwic_coding  %>% 
  #filter(code!=5) %>% 
  group_by(keyword) %>% 
  summarise("Number of Mentions" = n(),
            "Share of Mentions" = round(n()*100/nrow(kwic_coding),1)) %>% 
  arrange(-`Number of Mentions`) %>% 
  bind_rows(
    kwic_coding %>% 
      summarise("Number of Mentions" = n(),
                "Share of Mentions" = round(n()*100/nrow(kwic_coding),1)) %>% 
      add_column(`keyword` = "TOTAL",.before = 1)
    
  )

# This is Table C1
xtable::xtable(public_codes)%>% print(include.rownames=FALSE)

# This is Table C2
xtable::xtable(kwic_codes) %>% print(include.rownames=FALSE)

kwic_doclevel <- kwic_coding %>% 
  group_by(docname) %>% 
  summarise(coded = n()) %>% 
  left_join(kwic_coding %>% 
              filter(code%in%c(1,2)) %>% 
              group_by(docname) %>% 
              summarise(coded_nouns = n())) 
kwic_doclevel$coded_nouns[is.na(kwic_doclevel$coded_nouns)] <- 0

public.vars <- docvars(corpus_orig_text) %>% 
  cbind(docnames(corpus_orig_text)) %>% 
  mutate(Actor_Transcription = paste(Actor,Transcription,sep="_")) %>% 
  dplyr::rename("docname" = 'docnames(corpus_orig_text)') %>% 
  left_join(kwic_doclevel)

public.vars$coded[is.na(public.vars$coded)] <- 0
public.vars$coded_nouns[is.na(public.vars$coded_nouns)] <- 0

table(public.vars$coded,public.vars$coded_nouns,exclude=NULL)
table(public.vars$coded_nouns,exclude=NULL)

table(public.vars$coded>0)
table(public.vars$coded_nouns>0)
prop.table(table(public.vars$coded_nouns>0))

comb_speeches <- public.vars %>% 
  group_by(Actor_Transcription) %>% 
  summarise(coded_nouns_mentioned = sum(coded_nouns),
            coded_all_mentioned = sum(coded))

prop.table(table(comb_speeches$coded_nouns_mentioned>0))
prop.table(table(comb_speeches$coded_all_mentioned>0))

public.vars$coded_nouns_bin <- ifelse(public.vars$coded_nouns>0,1,0)
public.vars$coded_bin <- ifelse(public.vars$coded>0,1,0)
m3_nouns_bin <- glmer(coded_nouns_bin ~ gov_eu_supporter*image_lag6m_scaled+
                        gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                        unemployment_scaled+inflation_scaled+north_south+
                        Council_Config_final+Negotiation_Stage+
                        (1|Actor)+(1|Transcription) , family = binomial(link = "logit"),
                      control = glmerControl(optimizer = "bobyqa"),
                      nAGQ = 1,data = public.vars)

summary(m3_nouns_bin)

m3_bin <- glmer(coded_bin ~ gov_eu_supporter*image_lag6m_scaled+
                  gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                  unemployment_scaled+inflation_scaled+north_south+
                  Council_Config_final+Negotiation_Stage+
                  (1 | Actor)+(1|Transcription) , family = binomial(link = "logit"),
                control = glmerControl(optimizer = "bobyqa"),
                nAGQ = 1,data = public.vars)
summary(m3_bin)

m.plot.public <- sjPlot::plot_model(m3_bin,type="pred",terms = c("image_lag6m_scaled[all]","gov_eu_supporter"))
plot.dat <- m.plot.public$data

m.plot.public.nouns <- sjPlot::plot_model(m3_nouns_bin,type="pred",terms = c("image_lag6m_scaled[all]","gov_eu_supporter"))
plot.dat.nouns <- m.plot.public.nouns$data

aggregate(image_lag6m_scaled~gov_eu_supporter,public.vars,range)

plot.dat <- plot.dat %>% filter(group=="Europhile Government" & x > -3.21 & x < 2.35 |
                                  group=="Eurosceptic Government" & x > -2.61 & x < 1.59)

plot.dat$group <- plot.dat$group %>% 
  recode("Europhile Government" = "Pro-EU Government")

plot.dat.nouns <- plot.dat.nouns %>% filter(group=="Europhile Government" & x > -3.21 & x < 2.35 |
                                              group=="Eurosceptic Government" & x > -2.61 & x < 1.59)

plot.dat.nouns$group <- plot.dat.nouns$group %>% 
  recode("Europhile Government" = "Pro-EU Government")

p <- ggplot(plot.dat,aes(x=x,y=predicted,ymin=conf.low,ymax=conf.high))+
  geom_smooth(se=F,size=0.75,colour="black")+
  geom_smooth(aes(y=conf.low),size=0.75,se=F,lty="dashed",colour="black")+
  geom_smooth(aes(y=conf.high),size=0.75,se=F,lty="dashed",colour="black")+
  facet_wrap(~group)+
  labs(x="Public Image of the EU (z-score)",y="Predicted Likelihood of \nMentioning of Public")+
  theme_bw()+
  theme(strip.background = element_rect(fill = "white", colour = NA),
        strip.text = element_text(colour = "black",face = "bold"),
        axis.title = element_text(size=8,colour="black"),
        axis.text = element_text(colour = "black"))

p.nouns <- ggplot(plot.dat.nouns,aes(x=x,y=predicted,ymin=conf.low,ymax=conf.high))+
  geom_smooth(se=F,size=0.75,colour="black")+
  geom_smooth(aes(y=conf.low),size=0.75,se=F,lty="dashed",colour="black")+
  geom_smooth(aes(y=conf.high),size=0.75,se=F,lty="dashed",colour="black")+
  facet_wrap(~group)+
  labs(x="Public Image of the EU (z-score)",y="Predicted Likelihood of \nMentioning of Public")+
  theme_bw()+
  theme(strip.background = element_rect(fill = "white", colour = NA),
        strip.text = element_text(colour = "black",face = "bold"),
        axis.title = element_text(size=8,colour="black"),
        axis.text = element_text(colour = "black"))

# This is Figure C1
ggsave(plot = p,"figures_appendix/figure_c_1.eps", width = 6, height = 3.5, units = "in")

# This is Figure C2
ggsave(plot = p.nouns,"figures_appendix/figure_c_2.eps", width = 6, height = 3.5, units = "in")

stargazer::stargazer(m3_bin,m3_nouns_bin, type="text")

# This is Table C3
stargazer::stargazer(m3_bin,m3_nouns_bin,
                     dep.var.labels   = c("Referring to the Public","Nouns Referring to the Public"),
                     covariate.labels = c("Eurosceptic Government", "Public Image of the EU", 
                                          "Government Left-Right position", "Net receipts from EU budget",
                                          "Budget issue","Unanimity Required","Unemployment Rate","Inflation Rate",
                                          "Northern Europe","Southern Europe",
                                          "Council configuration: Ecofin","Council configuration: EPSCO",
                                          "Council configuration: ENV","Council configuration: JHA","Negotiation stage: Initial Presentation",
                                          "Negotiation stage: Mixed Negotiations","Negotiation stage: Policy Debates",
                                          "Eurosceptic Government x Public Image of the EU","Intercept"),                    
                     omit.stat = c("f","ll","rsq","adj.rsq","ser"))

res_list_bin <- summary(m3_bin)$coefficients
vcov_mat <- summary(m3_bin)$vcov

L_scep = vcov_mat[vcov_mat@Dimnames[[1]]=="image_lag6m_scaled",vcov_mat@Dimnames[[1]]=="image_lag6m_scaled"]+
  vcov_mat[vcov_mat@Dimnames[[1]]=="gov_eu_supporterEurosceptic Government:image_lag6m_scaled",vcov_mat@Dimnames[[1]]=="gov_eu_supporterEurosceptic Government:image_lag6m_scaled"]+
  2*vcov_mat[vcov_mat@Dimnames[[1]]=="gov_eu_supporterEurosceptic Government:image_lag6m_scaled",vcov_mat@Dimnames[[1]]=="image_lag6m_scaled"]
se_40_scep <- sqrt(L_scep)

se_40_phil <- sqrt(vcov_mat[vcov_mat@Dimnames[[1]]=="image_lag6m_scaled",vcov_mat@Dimnames[[1]]=="image_lag6m_scaled"])

res_40_est <- res_list_bin %>% data.frame()
est_40 <- res_40_est[row.names(res_40_est)=="image_lag6m_scaled","Estimate"]+res_40_est[row.names(res_40_est)=="gov_eu_supporterEurosceptic Government:image_lag6m_scaled","Estimate"]

tab_bin <- data.frame(Estimate = c(res_40_est[row.names(res_40_est)=="image_lag6m_scaled","Estimate"],
                                   est_40),
                      Std.Error = c(se_40_phil,
                                    se_40_scep))

tab_bin$tval <- tab_bin$Estimate/tab_bin$Std.Error
rdf <- nobs(m3_bin) - nrow(res_40_est)
tab_bin$p <- 2 * stats::pt(abs(tab_bin$tval), rdf, lower.tail = FALSE)

res_list_nouns_bin <- summary(m3_nouns_bin)$coefficients
vcov_mat <- summary(m3_nouns_bin)$vcov

L_scep = vcov_mat[vcov_mat@Dimnames[[1]]=="image_lag6m_scaled",vcov_mat@Dimnames[[1]]=="image_lag6m_scaled"]+
  vcov_mat[vcov_mat@Dimnames[[1]]=="gov_eu_supporterEurosceptic Government:image_lag6m_scaled",vcov_mat@Dimnames[[1]]=="gov_eu_supporterEurosceptic Government:image_lag6m_scaled"]+
  2*vcov_mat[vcov_mat@Dimnames[[1]]=="gov_eu_supporterEurosceptic Government:image_lag6m_scaled",vcov_mat@Dimnames[[1]]=="image_lag6m_scaled"]
se_40_scep <- sqrt(L_scep)

se_40_phil <- sqrt(vcov_mat[vcov_mat@Dimnames[[1]]=="image_lag6m_scaled",vcov_mat@Dimnames[[1]]=="image_lag6m_scaled"])

res_40_est <- res_list_nouns_bin %>% data.frame()
est_40 <- res_40_est[row.names(res_40_est)=="image_lag6m_scaled","Estimate"]+res_40_est[row.names(res_40_est)=="gov_eu_supporterEurosceptic Government:image_lag6m_scaled","Estimate"]

tab_nouns_bin <- data.frame(Estimate = c(res_40_est[row.names(res_40_est)=="image_lag6m_scaled","Estimate"],
                                         est_40),
                            Std.Error = c(se_40_phil,
                                          se_40_scep))

tab_nouns_bin$tval <- tab_nouns_bin$Estimate/tab_nouns_bin$Std.Error
rdf <- nobs(m3_nouns_bin) - nrow(res_40_est)
tab_nouns_bin$p <- 2 * stats::pt(abs(tab_nouns_bin$tval), rdf, lower.tail = FALSE)

# This is Table C4
xtable::xtable(bind_rows(tab_bin %>% mutate(Subset = "All",
                                            Government = c("Pro-EU","Eurosceptic")),
                         tab_nouns_bin %>% mutate(Subset = "Nouns Only",
                                                  Government = c("Pro-EU","Eurosceptic"))) %>% 
                 dplyr::rename("t-value" = "tval",
                               "p-value" = "p")) %>% 
  print(include.rownames=FALSE)


